library(Seurat)
## The legacy packages maptools, rgdal, and rgeos, underpinning the sp package,
## which was just loaded, will retire in October 2023.
## Please refer to R-spatial evolution reports for details, especially
## https://r-spatial.org/r/2023/05/15/evolution4.html.
## It may be desirable to make the sf package available;
## package maintainers should consider adding sf to Suggests:.
## The sp package is now running under evolution status 2
## (status 2 uses the sf package in place of rgdal)
## Attaching SeuratObject
library(SeuratDisk)
## Registered S3 method overwritten by 'SeuratDisk':
## method from
## as.sparse.H5Group Seurat
library(decoupleR)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tibble)
library(tidyr)
library(patchwork)
library(ggplot2)
library(pheatmap)
library(OmnipathR)
library(SingleR)
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
##
## Attaching package: 'matrixStats'
## The following object is masked from 'package:dplyr':
##
## count
##
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
##
## colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
## colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
## colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
## colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
## colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
## colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
## colWeightedMeans, colWeightedMedians, colWeightedSds,
## colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
## rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
## rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
## rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
## rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
## rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
## rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
## rowWeightedSds, rowWeightedVars
## Loading required package: GenomicRanges
## Loading required package: stats4
## Loading required package: BiocGenerics
##
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:dplyr':
##
## combine, intersect, setdiff, union
## The following objects are masked from 'package:stats':
##
## IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
##
## anyDuplicated, aperm, append, as.data.frame, basename, cbind,
## colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
## get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
## match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
## Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
## table, tapply, union, unique, unsplit, which.max, which.min
## Loading required package: S4Vectors
##
## Attaching package: 'S4Vectors'
## The following object is masked from 'package:tidyr':
##
## expand
## The following objects are masked from 'package:dplyr':
##
## first, rename
## The following objects are masked from 'package:base':
##
## expand.grid, I, unname
## Loading required package: IRanges
##
## Attaching package: 'IRanges'
## The following objects are masked from 'package:dplyr':
##
## collapse, desc, slice
## Loading required package: GenomeInfoDb
## Loading required package: Biobase
## Welcome to Bioconductor
##
## Vignettes contain introductory material; view with
## 'browseVignettes()'. To cite Bioconductor, see
## 'citation("Biobase")', and for packages 'citation("pkgname")'.
##
## Attaching package: 'Biobase'
## The following object is masked from 'package:MatrixGenerics':
##
## rowMedians
## The following objects are masked from 'package:matrixStats':
##
## anyMissing, rowMedians
## Warning: multiple methods tables found for 'aperm'
## Warning: replacing previous import 'BiocGenerics::aperm' by
## 'DelayedArray::aperm' when loading 'SummarizedExperiment'
##
## Attaching package: 'SummarizedExperiment'
## The following object is masked from 'package:SeuratObject':
##
## Assays
## The following object is masked from 'package:Seurat':
##
## Assays
Load seurat object
#Convert("all_samples.h5ad", dest = "h5seurat", overwrite = T)
seurat_anndata = LoadH5Seurat("all_samples.h5seurat", assays = "RNA")
## Validating h5Seurat file
## Initializing RNA with data
## Adding counts for RNA
## Adding scale.data for RNA
## Adding feature-level metadata for RNA
## Adding reduction harmony
## Adding cell embeddings for harmony
## Adding miscellaneous information for harmony
## Adding reduction pca
## Adding cell embeddings for pca
## Adding feature loadings for pca
## Adding miscellaneous information for pca
## Adding reduction umap
## Adding cell embeddings for umap
## Adding miscellaneous information for umap
## Adding command information
## Adding cell-level metadata
Check samples that have both single cell and bulk data
levels(seurat_anndata@meta.data$sample)
## [1] "8356" "11522" "11817" "11918" "12889" "12929" "12935" "13634" "13636"
## [10] "13774" "14428" "14958" "14965" "15002" "15467"
clinical.data <- data.frame(read.csv("/home/marcelo/LungPredict1/RawFiles/ColumnData_Vanderbilt.csv", row.names = 1))
clinical.data <- clinical.data[,c(1,5,6,9,11,13)]
coldata = clinical.data[clinical.data$pt_ID%in%levels(seurat_anndata@meta.data$sample),]
#14958 14965 11817 13634 15467 12929 8356 12889 15002
#"R3388_YZ_1" "R3388_YZ_2" "R3388_YZ_10" "R3388_YZ_11" "R3388_YZ_18" "R3388_YZ_28" "R3388_YZ_56" "R4163_YZ_16" "R4163_YZ_25"
Identify clusters
DimPlot(seurat_anndata, reduction="umap")
ElbowPlot(seurat_anndata) #determine dimensionality of data
seurat_anndata = FindNeighbors(seurat_anndata, dims = 1:20)
## Computing nearest neighbor graph
## Computing SNN
seurat_anndata = FindClusters(seurat_anndata, resolution = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 44876
## Number of edges: 1487565
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8737
## Number of communities: 25
## Elapsed time: 11 seconds
DimPlot(seurat_anndata, reduction="umap", label = T)
Cell annotation (ATLAS)
ref = celldex::HumanPrimaryCellAtlasData()
## Warning: Could not check database for updates.
## Database source currently unreachable.
## This should only be a temporary interruption.
## Using previously cached version.
## snapshotDate(): 2022-04-26
## see ?celldex and browseVignettes('celldex') for documentation
## Could not check id: EH3492 for updates.
## Using previously cached version.
## loading from cache
## Could not check id: EH3492 for updates.
## Using previously cached version.
## see ?celldex and browseVignettes('celldex') for documentation
## Could not check id: EH3493 for updates.
## Using previously cached version.
## loading from cache
## Could not check id: EH3493 for updates.
## Using previously cached version.
single_counts = GetAssayData(seurat_anndata, slot='counts')
pred = SingleR(test = single_counts,
ref = ref,
labels = ref$label.main)
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
seurat_anndata$singleR.labels = pred$labels[match(rownames(seurat_anndata@meta.data), rownames(pred))]
DimPlot(seurat_anndata, reduction = 'umap', group.by = 'singleR.labels')
head(pred$scores)
## Astrocyte B_cell BM BM & Prog. Chondrocytes CMP DC
## [1,] 0.1288609 0.2454694 0.2120456 0.1785377 0.1596201 0.2348847 0.2354978
## [2,] 0.1397776 0.3030350 0.2810913 0.2213133 0.1728824 0.2685374 0.2804226
## [3,] 0.1475280 0.2574010 0.2716059 0.1651107 0.1920963 0.2424944 0.4121792
## [4,] 0.1119149 0.2616144 0.2548422 0.1872333 0.1379515 0.2385703 0.2368478
## [5,] 0.1599564 0.2974818 0.2737211 0.1974025 0.1841983 0.2644691 0.2769523
## [6,] 0.1323993 0.2789778 0.2375703 0.1940324 0.1651088 0.2457796 0.2610297
## Embryonic_stem_cells Endothelial_cells Epithelial_cells Erythroblast
## [1,] 0.11055616 0.1720191 0.1649112 0.1711534
## [2,] 0.14682796 0.2061413 0.1984771 0.2102717
## [3,] 0.11516413 0.2243761 0.2078711 0.1646774
## [4,] 0.09878323 0.1655049 0.1424403 0.1646180
## [5,] 0.15609302 0.1984647 0.1890662 0.1865274
## [6,] 0.12805750 0.1919904 0.1725622 0.1855369
## Fibroblasts Gametocytes GMP Hepatocytes HSC_-G-CSF HSC_CD34+
## [1,] 0.1482040 0.1447390 0.2480521 0.1421683 0.2486675 0.2373476
## [2,] 0.1682255 0.1479854 0.2823766 0.1834610 0.3267963 0.2644097
## [3,] 0.2031987 0.1604163 0.2833434 0.2027688 0.3257528 0.2971054
## [4,] 0.1292689 0.1043757 0.2480834 0.1207127 0.2895977 0.2267591
## [5,] 0.1721161 0.1500515 0.2796854 0.1733605 0.3159671 0.2598502
## [6,] 0.1643852 0.1329930 0.2548679 0.1562751 0.2895931 0.2345147
## iPS_cells Keratinocytes Macrophage MEP Monocyte MSC Myelocyte
## [1,] 0.1394411 0.1544705 0.2300599 0.2011366 0.2573516 0.1407319 0.2235121
## [2,] 0.1524905 0.1699264 0.2840576 0.2351096 0.3025898 0.1558683 0.2639280
## [3,] 0.1636068 0.1870450 0.4078145 0.1920928 0.3987618 0.1676612 0.2898561
## [4,] 0.1208349 0.1025808 0.2383048 0.2018317 0.2600235 0.1223021 0.2282195
## [5,] 0.1606894 0.1669182 0.2726076 0.2209953 0.2963006 0.1623042 0.2597675
## [6,] 0.1474047 0.1368754 0.2519917 0.2120243 0.2768807 0.1484591 0.2398599
## Neuroepithelial_cell Neurons Neutrophils NK_cell Osteoblasts Platelets
## [1,] 0.09700509 0.1359534 0.2537397 0.3069176 0.1431023 0.1829822
## [2,] 0.11660883 0.1569972 0.2927893 0.3508412 0.1509951 0.2332163
## [3,] 0.09467130 0.1909475 0.3675563 0.2717566 0.1831689 0.2175084
## [4,] 0.07989130 0.1365315 0.2374779 0.3070698 0.1262604 0.2248319
## [5,] 0.12961705 0.1834586 0.2782209 0.3644448 0.1620286 0.2414834
## [6,] 0.10334531 0.1631646 0.2559404 0.3233688 0.1526883 0.2199729
## Pre-B_cell_CD34- Pro-B_cell_CD34+ Pro-Myelocyte Smooth_muscle_cells
## [1,] 0.2703212 0.2400679 0.2212136 0.1496663
## [2,] 0.3417694 0.2794768 0.2570039 0.1643441
## [3,] 0.3381400 0.2465303 0.2703544 0.1918451
## [4,] 0.2981949 0.2452074 0.2285450 0.1330463
## [5,] 0.3316083 0.2753085 0.2575988 0.1716137
## [6,] 0.3012617 0.2530245 0.2269616 0.1630644
## T_cells Tissue_stem_cells
## [1,] 0.2784824 0.1696429
## [2,] 0.3633034 0.1775285
## [3,] 0.2523547 0.2081034
## [4,] 0.3129331 0.1419393
## [5,] 0.3636598 0.1965812
## [6,] 0.3329824 0.1699385
plotScoreHeatmap(pred)
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
plotDeltaDistribution(pred)
## Warning: useNames = NA is deprecated. Instead, specify either useNames = TRUE
## or useNames = TRUE.
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(data$density): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_ydensity()`
## Caused by error in `$<-.data.frame`:
## ! replacement has 1 row, data has 0
## Warning: Groups with fewer than two data points have been dropped.
## Warning in max(data$density): no non-missing arguments to max; returning -Inf
## Warning: Computation failed in `stat_ydensity()`
## Caused by error in `$<-.data.frame`:
## ! replacement has 1 row, data has 0
tab = table(Assigned = pred$labels, Clusters = seurat_anndata$seurat_clusters)
pheatmap(log10(tab+10), color=colorRampPalette(c('white', 'blue'))(10))
Find markers for NK cells (NK cluster vs all others)
Idents(seurat_anndata) <- "singleR.labels"
markers_NK = FindMarkers(seurat_anndata, ident.1 = "NK_cell")
head(markers_NK, n = 10)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## FCRL6 0 0.3035116 0.166 0.022 0
## FCER1G 0 0.4691574 0.260 0.054 0
## FCGR3A 0 0.5388145 0.245 0.027 0
## SH2D1B 0 0.3822864 0.168 0.001 0
## XCL2 0 0.8452015 0.438 0.098 0
## XCL1 0 0.7461519 0.381 0.095 0
## GNLY 0 1.4889653 0.636 0.089 0
## FGFBP2 0 0.5280948 0.212 0.009 0
## CLIC3 0 0.4715471 0.273 0.058 0
## CTSW 0 0.8725013 0.687 0.238 0
features = rownames(markers_NK)[1:10]
FeaturePlot(seurat_anndata, features = features)
Extract NK cluster for further analysis
NK = subset(seurat_anndata, idents = "NK_cell", invert = FALSE)
DimPlot(NK, reduction="umap")
Cluster identification using NK cells
ElbowPlot(NK) #determine dimensionality of data
NK<- FindNeighbors(NK, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
NK<- FindClusters(NK, algorithm= 1, resolution = 0.15, verbose = T, graph.name = "RNA_snn")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2335
## Number of edges: 78630
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9034
## Number of communities: 4
## Elapsed time: 0 seconds
NK<- RunUMAP(NK, dims = 1:10)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 17:47:29 UMAP embedding parameters a = 0.9922 b = 1.112
## 17:47:29 Read 2335 rows and found 10 numeric columns
## 17:47:29 Using Annoy for neighbor search, n_neighbors = 30
## 17:47:29 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 17:47:29 Writing NN index file to temp file /tmp/RtmpaueZ2j/file4a35d36e3c4e2
## 17:47:29 Searching Annoy index using 1 thread, search_k = 3000
## 17:47:30 Annoy recall = 100%
## 17:47:30 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 17:47:31 Initializing from normalized Laplacian + noise (using irlba)
## 17:47:31 Commencing optimization for 500 epochs, with 99448 positive edges
## 17:47:33 Optimization finished
DimPlot(NK, reduction="umap", label = T, pt.size = 0.5) +
NoLegend() + ggtitle('NK cells')
Plot samples from bulk based on their clusters
clinical.data <- data.frame(read.csv("/home/marcelo/LungPredict1/RawFiles/ColumnData_Vanderbilt.csv", row.names = 1))
clinical.data <- clinical.data[,c(1,5,6,9,11,13)]
coldata = clinical.data[clinical.data$pt_ID%in%levels(NK@meta.data$sample),]
#"R3388_YZ_1" "R3388_YZ_2" "R3388_YZ_10" "R3388_YZ_11" "R3388_YZ_18" "R3388_YZ_28" "R3388_YZ_56" "R4163_YZ_16" "R4163_YZ_25"
# "R3388_YZ_1" ---> Cluster 2
# "R3388_YZ_2" ---> Cluster 1
# "R3388_YZ_10" ---> Cluster 1
# "R3388_YZ_28" ---> Cluster 1
cluster1 = c("14965", "11817", "12929") #"R3388_YZ_2", "R3388_YZ_10", "R3388_YZ_28"
cluster2 = c("14958") #R3388_YZ_1
NK[['Clusters_bulk']] = FALSE
NK@meta.data$Clusters_bulk[which(NK@meta.data$sample%in%cluster1)] = 'Bulk cluster 1'
NK@meta.data$Clusters_bulk[which(NK@meta.data$sample%in%cluster2)] = 'Bulk cluster 2'
cell_values <- c("Bulk cluster 1", "Bulk cluster 2")
nk2 = subset(NK, subset = Clusters_bulk == cell_values)
## Warning in Clusters_bulk == cell_values: longer object length is not a multiple
## of shorter object length
p2 = DimPlot(nk2, reduction="umap", group.by = 'Clusters_bulk') +
ggtitle('Samples from bulk')
p2
Find all markers across the different NK subclusters
markers = FindAllMarkers(NK,
logfc.threshold = 0.25, #min logfc
min.pct = 0.1, #genes that are detected on 50% frequency across clusters
only.pos = T,
test.use = 'wilcox',
slot = 'counts')
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
head(markers[which(markers$cluster==0),], n =5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## NKG7 8.652349e-167 0.6964493 0.967 0.624 2.068344e-162 0 NKG7
## GNLY 3.215519e-131 0.8971343 0.871 0.412 7.686698e-127 0 GNLY
## KLRD1 6.268599e-126 0.7644680 0.862 0.349 1.498509e-121 0 KLRD1
## PRF1 2.733930e-108 0.7737494 0.728 0.271 6.535461e-104 0 PRF1
## FCGR3A 1.777406e-100 0.8160589 0.438 0.062 4.248888e-96 0 FCGR3A
head(markers[which(markers$cluster==1),], n =5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## IL7R 6.556224e-40 0.5852136 0.411 0.186 1.567265e-35 1 IL7R
## GPR183 7.638977e-33 0.3774060 0.201 0.045 1.826098e-28 1 GPR183
## LTB 9.100511e-12 0.2646472 0.231 0.131 2.175477e-07 1 LTB
## GZMK 1.002142e-08 0.2576632 0.307 0.218 2.395621e-04 1 GZMK
head(markers[which(markers$cluster==2),], n =5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## TPSAB1 0 2.461008 1.000 0.005 0 2 TPSAB1
## TPSB2 0 2.128847 0.933 0.004 0 2 TPSB2
## CPA3 0 1.983955 0.950 0.001 0 2 CPA3
## TPSD1 0 1.619882 0.733 0.001 0 2 TPSD1
## HPGDS 0 1.498396 0.750 0.001 0 2 HPGDS
head(markers[which(markers$cluster==3),], n =5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## UBE2C 0.000000e+00 1.2353218 0.733 0.002 0.000000e+00 3 UBE2C
## ASPM 0.000000e+00 1.2111591 0.767 0.003 0.000000e+00 3 ASPM
## AURKB 5.936434e-288 0.8465231 0.567 0.000 1.419105e-283 3 AURKB
## MKI67 5.027183e-192 1.0389229 0.667 0.007 1.201748e-187 3 MKI67
## DLGAP5 1.086590e-187 0.6385642 0.400 0.000 2.597492e-183 3 DLGAP5
features = c("NKG7", "IL7R", "TPSAB1", "UBE2C")
RidgePlot(NK, features = features, ncol = 2)
## Picking joint bandwidth of 0.203
## Picking joint bandwidth of 0.0799
## Picking joint bandwidth of 0.0491
## Picking joint bandwidth of 0.112
VlnPlot(NK, features = features, ncol = 2)
FeaturePlot(NK, features = features, ncol = 2)
Differential expressed markers across NK clusters 0 and 1
markers = FindMarkers(NK, ident.1 = "0", ident.2 = "1")
head(markers, n=20)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## NKG7 1.753038e-141 0.6228464 0.967 0.670 4.190638e-137
## GNLY 5.391246e-110 0.8300237 0.871 0.440 1.288777e-105
## KLRD1 8.002077e-105 0.7193598 0.862 0.368 1.912897e-100
## PRF1 3.028386e-94 0.7471176 0.728 0.281 7.239356e-90
## FCGR3A 6.373991e-91 0.8168854 0.438 0.061 1.523702e-86
## FGFBP2 6.023619e-87 0.8104449 0.394 0.041 1.439946e-82
## PFN1 3.827231e-75 0.3861626 0.974 0.762 9.148996e-71
## CTSW 1.631863e-74 0.5593572 0.882 0.511 3.900969e-70
## ADGRG1 2.662240e-73 0.6181761 0.368 0.048 6.364084e-69
## CST7 4.726177e-72 0.4841510 0.945 0.635 1.129793e-67
## GZMH 8.327237e-72 0.6619931 0.711 0.323 1.990626e-67
## GZMB 1.124822e-68 0.5970926 0.829 0.445 2.688887e-64
## CD247 1.650797e-65 0.5973813 0.668 0.294 3.946230e-61
## TYROBP 1.970991e-65 0.6673738 0.655 0.291 4.711654e-61
## ITGB2 1.095843e-62 0.5754744 0.699 0.344 2.619612e-58
## KLRC2 5.906606e-58 0.6114030 0.421 0.112 1.411974e-53
## KLRC3 8.634346e-58 0.5801401 0.443 0.131 2.064040e-53
## GZMA 3.678031e-52 0.5325601 0.755 0.422 8.792334e-48
## FCRL6 8.690340e-52 0.4724638 0.293 0.046 2.077426e-47
## HCST 6.969426e-50 0.3991031 0.896 0.633 1.666041e-45
features = c("NKG7", "GNLY", "KLRD1", "GZMH", "GZMB", "GZMA")
#features = c('CCL3', 'TIGIT', 'CD69','IRF8','IL2RB','CD160') markers from Pierre-Paul
FeaturePlot(NK, features = features, ncol = 3)
VlnPlot(NK, features = features, ncol = 3)
RidgePlot(NK, features = features, ncol = 3)
## Picking joint bandwidth of 0.203
## Picking joint bandwidth of 0.361
## Picking joint bandwidth of 0.194
## Picking joint bandwidth of 0.241
## Picking joint bandwidth of 0.243
## Picking joint bandwidth of 0.254
TFs inference from single cell data
Extract TFs network from Dorothea database
dorothea2viper_regulons <- function(df) {
regulon_list <- split(df, df$tf)
viper_regulons <- lapply(regulon_list, function(regulon) {
tfmode <- stats::setNames(regulon$mor, regulon$target)
list(tfmode = tfmode, likelihood = rep(1, length(tfmode)))
})
return(viper_regulons)
}
data("dorothea_hs", package = "dorothea")
regulons <- dorothea_hs %>%
filter(confidence %in% c("A", "B", "C"))
Extracting only variable features from NK cluster to calculate TFs activity
NK <- FindVariableFeatures(NK, selection.method = "vst", nfeatures = 10000)
var_feat = VariableFeatures(NK)
nk_top <- GetAssayData(NK)[var_feat,]
Run Univariate linear model (ULM) of TFs in NK object
# Extract the normalized log-transformed counts
mat <- as.matrix(nk_top)
# Run ulm
acts <- run_ulm(mat=mat, net=regulons, .source='tf', .target='target',
.mor='mor', minsize = 5)
head(acts)
## # A tibble: 6 × 5
## statistic source condition score p_value
## <chr> <chr> <chr> <dbl> <dbl>
## 1 ulm AHR AAACCTGAGCTTTGGT-1 0.768 0.442
## 2 ulm AHR AAAGTAGAGGCCCTCA-1 2.42 0.0154
## 3 ulm AHR AACCATGCAGGATCGA-1 1.47 0.141
## 4 ulm AHR AACTCCCAGTGGTCCC-1 0.832 0.405
## 5 ulm AHR AACTGGTCACACAGAG-1 2.42 0.0155
## 6 ulm AHR ACACTGACAGCTGGCT-1 -0.708 0.479
Save TFs results in Seurat object
# Extract ulm and store it in tfsulm in pbmc
NK[['tfsulm']] <- acts %>%
pivot_wider(id_cols = 'source', names_from = 'condition',
values_from = 'score') %>%
column_to_rownames('source') %>%
Seurat::CreateAssayObject(.)
# Change assay
DefaultAssay(object = NK) <- "tfsulm"
# Scale the data
NK <- ScaleData(NK)
## Centering and scaling data matrix
NK@assays$tfsulm@data <- NK@assays$tfsulm@scale.data
Find all TFs markers of NK subclusters
markers = FindAllMarkers(NK,
logfc.threshold = 0.25, #min logfc
min.pct = 0.1, #genes that are detected on 50% frequency across clusters
only.pos = T,
test.use = 'wilcox',
slot = 'counts')
## Calculating cluster 0
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Warning in mean.fxn(object[features, cells.2, drop = FALSE]): NaNs produced
## Calculating cluster 1
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 2
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Calculating cluster 3
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
## Warning in mean.fxn(object[features, cells.1, drop = FALSE]): NaNs produced
head(markers[which(markers$cluster==0),], n =5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## STAT4 5.320614e-111 0.6463635 0.989 0.882 1.372718e-108 0 STAT4
## SPI1 5.532616e-80 0.3948460 1.000 0.951 1.427415e-77 0 SPI1
## STAT5B 1.636637e-76 0.7454263 0.901 0.655 4.222523e-74 0 STAT5B
## LYL1 4.167052e-70 0.4508138 0.989 0.934 1.075099e-67 0 LYL1
## STAT5A 8.659311e-64 0.6558171 0.894 0.705 2.234102e-61 0 STAT5A
head(markers[which(markers$cluster==1),], n =5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## SOX11 2.126740e-40 0.5915957 0.176 0.129 5.486989e-38 1 SOX11
## HNF4A 6.340450e-27 1.1474394 0.192 0.103 1.635836e-24 1 HNF4A
## FOXA1 3.236662e-17 0.3623624 0.627 0.572 8.350588e-15 1 FOXA1
## SIX5 1.666013e-09 0.2784527 0.204 0.166 4.298315e-07 1 SIX5
## AHR 6.233444e-09 0.2512032 0.727 0.671 1.608229e-06 1 AHR
head(markers[which(markers$cluster==2),], n =5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## FOSL2 4.763272e-47 2.069377 0.800 0.116 1.228924e-44 2 FOSL2
## FOSL1 1.754978e-45 1.668614 0.783 0.144 4.527844e-43 2 FOSL1
## GATA2 1.403975e-44 1.042382 0.975 0.833 3.622256e-42 2 GATA2
## NR1H2 1.575353e-37 1.018966 0.958 0.753 4.064411e-35 2 NR1H2
## HOXA9 2.698197e-37 0.806508 0.992 0.842 6.961349e-35 2 HOXA9
head(markers[which(markers$cluster==3),], n =5)
## p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene
## E2F4 5.404920e-20 1.1177037 1.000 0.991 1.394469e-17 3 E2F4
## SMAD5 9.025174e-17 1.0043861 1.000 0.879 2.328495e-14 3 SMAD5
## TFDP1 1.247243e-15 1.9411721 0.933 0.298 3.217887e-13 3 TFDP1
## MYC 1.633618e-12 0.5067407 1.000 0.999 4.214733e-10 3 MYC
## E2F1 2.455767e-10 0.8215032 0.967 0.864 6.335878e-08 3 E2F1
features = c("STAT4", "SOX11", "FOSL2", "E2F4")
RidgePlot(NK, features = features, ncol = 2)
## Picking joint bandwidth of 0.254
## Picking joint bandwidth of 0.205
## Picking joint bandwidth of 0.187
## Picking joint bandwidth of 0.295
VlnPlot(NK, features = features, ncol = 2)
(FeaturePlot(NK, features = features, ncol = 2)&
scale_colour_gradient2(low = 'blue', mid = 'white', high = 'red'))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Clustering using protein activity
NK<- FindNeighbors(NK, dims = 1:10)
## Computing nearest neighbor graph
## Computing SNN
NK<- FindClusters(NK, algorithm= 1, resolution = 0.15, verbose = T, graph.name = "RNA_snn")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 2335
## Number of edges: 78630
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9034
## Number of communities: 4
## Elapsed time: 0 seconds
NK<- RunUMAP(NK, dims = 1:10)
## 18:10:36 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:10:36 Read 2335 rows and found 10 numeric columns
## 18:10:36 Using Annoy for neighbor search, n_neighbors = 30
## 18:10:36 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:10:36 Writing NN index file to temp file /tmp/RtmpaueZ2j/file4a35d509c3bec
## 18:10:36 Searching Annoy index using 1 thread, search_k = 3000
## 18:10:37 Annoy recall = 100%
## 18:10:38 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 18:10:39 Initializing from normalized Laplacian + noise (using irlba)
## 18:10:39 Commencing optimization for 500 epochs, with 99448 positive edges
## 18:10:41 Optimization finished
DimPlot(NK, reduction="umap", label = T, pt.size = 0.5) +
NoLegend() + ggtitle('NK cells')
Differential TFs across cluster 0 vs 1 NK
markers = FindMarkers(NK, ident.1 = "0", ident.2 = "1")
head(markers, n=30)
## p_val avg_log2FC pct.1 pct.2 p_val_adj
## STAT4 1.598788e-98 1.3639683 0.689 0.305 4.124873e-96
## FOSL2 1.950545e-96 -0.4735070 0.099 0.144 5.032407e-94
## NR2F2 1.586578e-90 -0.2809897 0.141 0.525 4.093372e-88
## FOSL1 9.154850e-76 -0.3367654 0.139 0.145 2.361951e-73
## LYL1 4.856518e-72 1.1837349 0.652 0.312 1.252982e-69
## SPI1 6.232210e-65 1.0212166 0.683 0.367 1.607910e-62
## STAT5B 5.210121e-63 1.0401843 0.646 0.317 1.344211e-60
## LEF1 6.475405e-58 -0.4355620 0.191 0.204 1.670655e-55
## STAT5A 3.683171e-52 0.9316636 0.632 0.349 9.502581e-50
## E2F2 4.468685e-50 -0.2623321 0.169 0.327 1.152921e-47
## SOX11 7.375228e-47 -0.5698739 0.260 0.306 1.902809e-44
## BHLHE22 1.436480e-45 -0.9505308 0.415 0.655 3.706119e-43
## TBX21 3.531328e-39 0.7326470 0.646 0.402 9.110827e-37
## HNF4A 5.151237e-34 -0.7172014 0.340 0.438 1.329019e-31
## E2F3 4.104912e-31 -0.4183996 0.365 0.360 1.059067e-28
## NEUROD1 6.400024e-30 0.7049793 0.557 0.363 1.651206e-27
## IRF1 8.340040e-30 0.6457881 0.620 0.425 2.151730e-27
## KLF4 1.176138e-27 -0.2908154 0.331 0.341 3.034436e-25
## BCL6 2.450209e-25 -0.6426935 0.411 0.598 6.321538e-23
## TEAD2 6.182513e-22 0.5937138 0.550 0.380 1.595088e-19
## TP73 6.198866e-22 0.6276116 0.546 0.352 1.599307e-19
## BATF 1.332879e-21 0.5992313 0.572 0.405 3.438829e-19
## IRF4 9.456509e-21 0.5299259 0.612 0.414 2.439779e-18
## ZEB1 1.333514e-20 -0.7035420 0.439 0.617 3.440467e-18
## IRF3 2.664850e-20 0.5283186 0.590 0.432 6.875314e-18
## RUNX1 4.149663e-19 0.5350491 0.560 0.384 1.070613e-16
## ZNF24 6.118171e-19 0.2943659 0.597 0.624 1.578488e-16
## STAT1 7.218552e-19 0.5205671 0.597 0.436 1.862386e-16
## ZNF274 3.720179e-18 0.3329226 0.621 0.654 9.598061e-16
## GABPA 4.614047e-18 0.5349285 0.543 0.393 1.190424e-15
features = c("STAT4", "TBX21", "FOSL2", "NR2F2", "BATF", "RUNX1","IRF4","BCL6")
(FeaturePlot(NK, features = features, ncol = 2)&
scale_colour_gradient2(low = 'blue', mid = 'white', high = 'red'))
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
VlnPlot(NK, features = features, ncol = 2)
RidgePlot(NK, features = features, ncol = 2)
## Picking joint bandwidth of 0.254
## Picking joint bandwidth of 0.241
## Picking joint bandwidth of 0.187
## Picking joint bandwidth of 0.0664
## Picking joint bandwidth of 0.259
## Picking joint bandwidth of 0.289
## Picking joint bandwidth of 0.268
## Picking joint bandwidth of 0.265
Visualization and comparison of results from gene expression and protein activity
p1 <- DimPlot(NK, reduction = "umap", label = TRUE, pt.size = 0.5) +
NoLegend() + ggtitle('Cell types')
p2 <- (FeaturePlot(NK, features = c("STAT4")) &
scale_colour_gradient2(low = 'blue', mid = 'white', high = 'red')) +
ggtitle('STAT4 activity')
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
DefaultAssay(object = NK) <- "RNA"
NK@assays$RNA@data <- NK@assays$RNA@scale.data
p3 <- (FeaturePlot(NK, features = c("STAT4")) &
scale_colour_gradient2(low = 'blue', mid = 'white', high = 'red')) + ggtitle('STAT4 expression')
## Warning: Could not find STAT4 in the default search locations, found in tfsulm
## assay instead
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
DefaultAssay(object = NK) <- "tfsulm"
p1 | p2 | p3
Top 30 variable TFs across NK clusters
n_tfs <- 30
# Extract activities from object as a long dataframe
df <- t(as.matrix(NK@assays$tfsulm@data)) %>%
as.data.frame() %>%
mutate(cluster = Idents(NK)) %>%
pivot_longer(cols = -cluster, names_to = "source", values_to = "score") %>%
group_by(cluster, source) %>%
summarise(mean = mean(score))
## `summarise()` has grouped output by 'cluster'. You can override using the
## `.groups` argument.
# Get top tfs with more variable means across clusters
tfs <- df %>%
group_by(source) %>%
summarise(std = sd(mean)) %>%
arrange(-abs(std)) %>%
head(n_tfs) %>%
pull(source)
# Subset long data frame to top tfs and transform to wide matrix
top_acts_mat <- df %>%
filter(source %in% tfs) %>%
pivot_wider(id_cols = 'cluster', names_from = 'source',
values_from = 'mean') %>%
column_to_rownames('cluster') %>%
as.matrix()
# Plot
pheatmap(top_acts_mat, border_color = NA)
Features plot of TFs found in bulk analysis
(FeaturePlot(NK, features = c("TBX21","PAX5", "LYL1", "HIF1A", "CXXC4")) &
scale_colour_gradient2(low = 'blue', mid = 'white', high = 'red'))
## Warning in FetchData.Seurat(object = object, vars = c(dims, "ident", features),
## : The following requested variables were not found: CXXC4
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
## Scale for colour is already present.
## Adding another scale for colour, which will replace the existing scale.
Re-clustering using only subset of patients with bulk
nk2 = subset(NK, subset = Clusters_bulk == cell_values)
## Warning in Clusters_bulk == cell_values: longer object length is not a multiple
## of shorter object length
ElbowPlot(nk2) #determine dimensionality of data
nk2<- FindNeighbors(nk2, dims = 1:15)
## Computing nearest neighbor graph
## Computing SNN
nk2<- FindClusters(nk2, algorithm= 1, resolution = 0.15, verbose = T, graph.name = "RNA_snn")
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
##
## Number of nodes: 341
## Number of edges: 11808
##
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8596
## Number of communities: 2
## Elapsed time: 0 seconds
nk2<- RunUMAP(nk2, dims = 1:10)
## 18:10:49 UMAP embedding parameters a = 0.9922 b = 1.112
## 18:10:49 Read 341 rows and found 10 numeric columns
## 18:10:49 Using Annoy for neighbor search, n_neighbors = 30
## 18:10:49 Building Annoy index with metric = cosine, n_trees = 50
## 0% 10 20 30 40 50 60 70 80 90 100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 18:10:49 Writing NN index file to temp file /tmp/RtmpaueZ2j/file4a35d79b772c3
## 18:10:49 Searching Annoy index using 1 thread, search_k = 3000
## 18:10:49 Annoy recall = 100%
## 18:10:49 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
## 18:10:50 Initializing from normalized Laplacian + noise (using irlba)
## 18:10:50 Commencing optimization for 500 epochs, with 14166 positive edges
## 18:10:51 Optimization finished
DimPlot(nk2, reduction="umap", label = T, pt.size = 0.5) +
NoLegend() + ggtitle('NK cells')
DimPlot(nk2, reduction="umap", group.by = 'Clusters_bulk') +
ggtitle('Samples from bulk')
saveRDS(NK, "nk.rds")